#!/bin/bash -e

#
# Federal University of Campina Grande
# Distributed Systems Laboratory
#
# Author: Armstrong Mardilson da Silva Goes
# Contact: armstrongmsg@lsd.ufcg.edu.br
#

#
# Data Collector Master
# 
# This program starts a hadoop benchmark and the data collectors.
#
# usage:
# master BENCHMARK CONFIGURATION_FILE
#
# Parameters:
# BENCHMARK : the benchmark to be executed.
# CONFIGURATION_FILE : the master configuration file path.
# The structured as follows:
# 
# TIME_BETWEEN_CHECKS=VALUE
# TERASORT_INPUT_DIRECTORY=VALUE
# TERASORT_OUTPUT_DIRECTORY=VALUE
# TERASORT_VALIDATION_DIRECTORY=VALUE
# TERASORT_DATA_AMOUNT=VALUE
# MR_BENCH_RUNS=VALUE
# DFSIO_FILE_SIZE=VALUE
# DFSIO_NUMBER_OF_FILES=VALUE
#

BENCHMARK=$1
CONFIGURATION_FILE=$2

# collector configuration
COLLECTOR_NAME="slave_data_collector.sh"
TIME_BETWEEN_CHECKS=0

# terasort configuration
TERASORT_INPUT_DIRECTORY=""
TERASORT_OUTPUT_DIRECTORY=""
TERASORT_VALIDATION_DIRECTORY=""
# number of lines of 100 bytes generated by teragen
TERASORT_DATA_AMOUNT=0

# mr bench configuration
MR_BENCH_RUNS=0

# dfsio configuration
DFSIO_FILE_SIZE=0
DFSIO_NUMBER_OF_FILES=0

# debug configuration
DEBUG=true
DEBUG_FILE_NAME="master.log"

# systemtap configuration
SYSTEMTAP_SCRIPT="systemtap/syscalls_elapsed.stp"

HADOOP=$HADOOP_HOME

function debug_startup
{
	if [ $DEBUG ]; then
		touch $DEBUG_FILE_NAME	
	fi
}

function debug
{
	if [ $DEBUG ]; then
		echo -n "`date "+%d-%m-%Y-%H-%M-%S"`    " >> $DEBUG_FILE_NAME
		# TODO if the log file is too big, it must truncate to 0
		# or do something so the file does not grow without limit.
		echo $1	>> $DEBUG_FILE_NAME
	fi
}

function read_configuration
{
	CONFIGURATION_CONTENT=`cat $CONFIGURATION_FILE | grep -v "#"`
	debug "loaded configuration = $CONFIGURATION_CONTENT"
	CONTENT=($CONFIGURATION_CONTENT)

	# collector configuration
	TIME_BETWEEN_CHECKS="`echo ${CONTENT[0]} | cut -d = -f2-`"
	
	# tera sort configuration
	TERASORT_INPUT_DIRECTORY="`echo ${CONTENT[1]} | cut -d = -f2-`"
	TERASORT_OUTPUT_DIRECTORY="`echo ${CONTENT[2]} | cut -d = -f2-`"
	TERASORT_VALIDATION_DIRECTORY="`echo ${CONTENT[3]} | cut -d = -f2-`"
	TERASORT_DATA_AMOUNT="`echo ${CONTENT[4]} | cut -d = -f2-`"

	# mr bench configuration
	MR_BENCH_RUNS="`echo ${CONTENT[5]} | cut -d = -f2-`"

	# dfsio configuration
	DFSIO_FILE_SIZE="`echo ${CONTENT[6]} | cut -d = -f2-`"	
	DFSIO_NUMBER_OF_FILES="`echo ${CONTENT[7]} | cut -d = -f2-`"
	
	debug "time between checks = $TIME_BETWEEN_CHECKS"
	debug "terasort input directory = $TERASORT_INPUT_DIRECTORY"
	debug "terasort output directory = $TERASORT_OUTPUT_DIRECTORY"
	debug "terasort validation directory = $TERASORT_VALIDATION_DIRECTORY"
	debug "terasort data amount = $TERASORT_DATA_AMOUNT"
	debug "mr bench runs = $MR_BENCH_RUNS"
	debug "dfsio file size = $DFSIO_FILE_SIZE"
	debug "dfsio number of files = $DFSIO_NUMBER_OF_FILES"
}

function start_benchmark
{
	BENCHMARK=$1

	debug "run $BENCHMARK"
	case $BENCHMARK in
		"test")
			COMMAND=/usr/bin/firefox 
			;;
		"teragen")
			COMMAND="$HADOOP/bin/hadoop jar $HADOOP/hadoop-*examples*.jar teragen $TERASORT_DATA_AMOUNT $TERASORT_INPUT_DIRECTORY"
			;;
		"terasort")
			COMMAND="$HADOOP/bin/hadoop jar $HADOOP/hadoop-*examples*.jar terasort $TERASORT_INPUT_DIRECTORY $TERASORT_OUTPUT_DIRECTORY"
			;;
		"teravalidate")
			COMMAND="$HADOOP/bin/hadoop jar $HADOOP/hadoop-*examples*.jar teravalidate $TERASORT_OUTPUT_DIRECTORY $TERASORT_VALIDATION_DIRECTORY"
			;;
		"teraclean")
			COMMAND="$HADOOP/bin/hadoop fs -rmr $TERASORT_INPUT_DIRECTORY $TERASORT_OUTPUT_DIRECTORY $TERASORT_VALIDATION_DIRECTORY"
			;;
		"mr")
			COMMAND="$HADOOP/bin/hadoop jar $HADOOP/hadoop-*test*.jar mrbench -numRuns $MR_BENCH_RUNS"
			;;
		"dfread")
			COMMAND="$HADOOP/bin/hadoop jar $HADOOP/hadoop-*test*.jar TestDFSIO -read -nrFiles $DFSIO_NUMBER_OF_FILES -fileSize $DFSIO_FILE_SIZE"
			;;
		"dfwrite")
			COMMAND="$HADOOP/bin/hadoop jar $HADOOP/hadoop-*test*.jar TestDFSIO -write -nrFiles $DFSIO_NUMBER_OF_FILES -fileSize $DFSIO_FILE_SIZE"
			;;
		"dfclean")
			COMMAND="$HADOOP/bin/hadoop jar $HADOOP/hadoop-*test*.jar TestDFSIO -clean"
			;;
		*)
			echo "Invalid benchmark."
			exit
			;;
	esac

	debug "benchmark command : $COMMAND"	
		
	$COMMAND &
	PROCESS_PID=$!

	debug "started pid: $PROCESS_PID"
}

function start_collector
{
	debug "starting collector"
	bash $COLLECTOR_NAME $PROCESS_PID $TIME_BETWEEN_CHECKS $1 &
}

function start_systemtap
{
	debug "starting systemtap"
	stap $SYSTEMTAP_SCRIPT -x $PROCESS_PID -o "$1.syscall" &
}

# TODO maybe write the machine configuration when starting
debug_startup
debug "starting master"

read_configuration

start_benchmark $BENCHMARK
start_collector $BENCHMARK
start_systemtap $BENCHMARK

debug "--------------------"
debug "--------------------"
